##########################################################################################

library(data.table)
library(optparse)

##########################################################################################

option_list <- list(
    make_option(c("--input_file"), type = "character") ,
    make_option(c("--njmu_info_file"), type = "character") ,
    make_option(c("--tcga_info_file"), type = "character") ,
    make_option(c("--tmucih_info_file"), type = "character") ,
    make_option(c("--out_file"), type = "character")
)

if(1!=1){
    
    input_file <- "~/20220915_gastric_multiple/dna_combinePublic/public_ref/combine/MutationInfo.combine.addMolecularSubType.rmMIX.tsv"
    njmu_info_file <- "~/20220915_gastric_multiple/dna_combinePublic/config/STAD_MutipleReigon_baseline.addAlcoholFreq.tsv"
    tcga_info_file <- "~/20220915_gastric_multiple/dna_combinePublic/public_ref/TCGA/stad_tcga_pan_can_atlas_2018_clinical_data.tsv"
    tmucih_info_file <- "~/20220915_gastric_multiple/dna_combinePublic/public_ref/TMUCIH/egc_tmucih_2015_clinical_data.tsv"
    out_path <- "~/20220915_gastric_multiple/dna_combinePublic/public_ref/combine"

}

###########################################################################################

parseobj <- OptionParser(option_list=option_list, usage = "usage: Rscript %prog [options]")
opt <- parse_args(parseobj)
print(opt)

input_file <- opt$input_file
tcga_info_file <- opt$tcga_info_file
tmucih_info_file <- opt$tmucih_info_file
njmu_info_file <- opt$njmu_info_file
out_file <- opt$out_file

###########################################################################################

dat_info <- data.frame(fread(input_file))
dat_njmu <- data.frame(fread(njmu_info_file))
dat_tcga <- data.frame(fread(tcga_info_file))
dat_tmucih <- data.frame(fread(tmucih_info_file))

###########################################################################################
## 注释预后
dat_tcga <- dat_tcga[,c("Patient.ID" , "Overall.Survival..Months." , "Overall.Survival.Status")]
dat_tcga$Overall.Survival.Status <- as.numeric(sapply(strsplit(dat_tcga$Overall.Survival.Status , ":") , "[" , 1))
dat_tcga$Overall.Survival.Status[is.na(dat_tcga$Overall.Survival..Months.) | dat_tcga$Overall.Survival..Months.==0] <- "unknown"
dat_tcga$Overall.Survival..Months.[is.na(dat_tcga$Overall.Survival..Months.)] <- "unknown"
dat_tcga$Overall.Survival..Months.[dat_tcga$Overall.Survival..Months.==0] <- "unknown"
colnames(dat_tcga) <- c("Tumor" , "OS_month" , "OS_status")

dat_tmucih <- dat_tmucih[,c("Patient.ID" , "Overall.Survival..Months." , "Overall.Survival.Status")]
dat_tmucih$Overall.Survival.Status <- as.numeric(sapply(strsplit(dat_tmucih$Overall.Survival.Status , ":") , "[" , 1))
dat_tmucih$Overall.Survival.Status[is.na(dat_tmucih$Overall.Survival..Months.) | dat_tmucih$Overall.Survival..Months.==0] <- "unknown"
dat_tmucih$Overall.Survival..Months.[is.na(dat_tmucih$Overall.Survival..Months.)] <- "unknown"
dat_tmucih$Overall.Survival..Months.[dat_tmucih$Overall.Survival..Months.==0] <- "unknown"
colnames(dat_tmucih) <- c("Tumor" , "OS_month" , "OS_status")

dat_njmu <- dat_njmu[,c("Patient" , "OS_month" , "OS_status")]
dat_njmu[is.na(dat_njmu) ] <- "unknown"
colnames(dat_njmu) <- c("Tumor" , "OS_month" , "OS_status")

###########################################################################################
## 注释
dat_sur <- unique(rbind( dat_tcga , dat_tmucih , dat_njmu ))
result <- merge(dat_info , dat_sur , all.x = "T")
result$OS_month[is.na(result$OS_month)] <- "unknown"
result$OS_status[is.na(result$OS_status)] <- "unknown"
write.table( result , out_file , row.names = F , sep = "\t" , quote = F )